library(rgdal)
library(ggplot2)
library(sf)
library(tidyverse)
library(ggmap)
tmp <- tempfile()
url <- "https://github.com/ConnorCheng2/Data-Science-for-Good/archive/refs/heads/master.zip"
download.file(url, destfile = tmp)
unzip(tmp)
unlink(tmp)
To path πρέπει να το ορίσει ο κάθε χρήστης σωστά σύμφωνα με το working directory που δουλεύει. Το δικό μου είναι αυτό:
getwd()
## [1] "D:/DataAskiseis/Essay"
οπότε τα αρχείa που χρειαζόμαστε θα είναι μέσα στον φάκελο “cpe-data” με path = “../Essay/Data-Science-for-Good-master/cpe-data”
Τα δεδομένα αυτά αφορούν παραβιάσεις και εγκλήματα που έχουν καταγραφεί από διάφορα αστυνομικά τμήματα στην αμερική.
list.files(path = "../Essay/Data-Science-for-Good-master/cpe-data")
## [1] "ACS_variable_descriptions.csv" "Dept_11-00091"
## [3] "Dept_23-00089" "Dept_24-00013"
## [5] "Dept_24-00098" "Dept_35-00016"
## [7] "Dept_35-00103" "Dept_37-00027"
## [9] "Dept_37-00049" "Dept_49-00009"
## [11] "Dept_49-00033" "Dept_49-00035"
## [13] "Dept_49-00081"
Μπορούμε να χρησιμοποιήσουμε τα δεδομένα από οποιοδήποτε τμήμα επιθυμούμε.Εδώ θα χρησιμοποιήσουμε τα δεδομένα από το τμήμα “Dept_37-00027”.
# see the files inside "Dept_37-00027"
knitr::kable(list.files(path = "../Essay/Data-Science-for-Good-master/cpe-data/Dept_37-00027/37-00027_Shapefiles"))
| x |
|---|
| APD_DIST.dbf |
| APD_DIST.sbn |
| APD_DIST.sbx |
| APD_DIST.shp |
| APD_DIST.shx |
# read the shp file
shp <- readOGR(dsn = "../Essay/Data-Science-for-Good-master/cpe-data/Dept_37-00027/37-00027_Shapefiles/APD_DIST.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "D:\DataAskiseis\Essay\Data-Science-for-Good-master\cpe-data\Dept_37-00027\37-00027_Shapefiles\APD_DIST.shp", layer: "APD_DIST"
## with 53 features
## It has 19 fields
class(shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
names(shp)
## [1] "NAME" "SORTORDER" "BATID" "JURIID" "COLOR"
## [6] "CODE" "EXTERNALKE" "BATTALIONC" "DISTRICT" "SECTOR"
## [11] "INPUT_DATE" "MODIFIED_D" "INPUT_BY" "MODIFIED_B" "BUREAU"
## [16] "PATROL_ARE" "AREACOMMAN" "SHAPE_AREA" "SHAPE_LEN"
summary(shp@data)
## NAME SORTORDER BATID JURIID
## Length:53 Min. :0.0000 Min. : 21 Min. : 6.00
## Class :character 1st Qu.:0.0000 1st Qu.:164 1st Qu.: 12.00
## Mode :character Median :1.0000 Median :205 Median : 15.00
## Mean :0.6981 Mean :253 Mean : 53.38
## 3rd Qu.:1.0000 3rd Qu.:307 3rd Qu.:121.00
## Max. :1.0000 Max. :520 Max. :145.00
## COLOR CODE EXTERNALKE BATTALIONC
## Min. : 32768 Length:53 Length:53 Length:53
## 1st Qu.:16711680 Class :character Class :character Class :character
## Median :16711808 Mode :character Mode :character Mode :character
## Mean :14705527
## 3rd Qu.:16744576
## Max. :16776960
## DISTRICT SECTOR INPUT_DATE MODIFIED_D
## Length:53 Length:53 Length:53 Length:53
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## INPUT_BY MODIFIED_B BUREAU PATROL_ARE
## Length:53 Length:53 Length:53 Length:53
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## AREACOMMAN SHAPE_AREA SHAPE_LEN
## Length:53 Min. : 13987 Min. : 478.7
## Class :character 1st Qu.: 13071085 1st Qu.: 19773.8
## Mode :character Median : 88304929 Median : 48972.5
## Mean :144632577 Mean : 82607.7
## 3rd Qu.:189079669 3rd Qu.: 94701.7
## Max. :712649906 Max. :453215.1
shp_df <- fortify(shp)
## Regions defined for each Polygons
class(shp_df)
## [1] "data.frame"
# plot map using ggplot
shpMap <- ggplot(data = shp_df,
aes(long,lat))+
geom_polygon(aes(group = group),
fill = "grey80")+
coord_equal()+
labs(x = "Longitude (Degrees)",
y = "Latitude (Degrees)",
title = "Map ",
subtitle = "Map - Based on the Lat Long in Shape Files")
shpMap
# read the csv
myData <- read_csv("../Essay/Data-Science-for-Good-master/cpe-data/Dept_37-00027/37-00027_UOF-P_2014-2016_prepped.csv", skip = 1)
Βλέπουμε ότι τα δεδομένα αυτά είναι για την πόλη Austin που βρίσκεται στην πολιτεία του Texas.
knitr::kable(myData[1:1,26:27])
| City | State |
|---|---|
| Austin | TX |
Οι πρώτες 4 γραμμές και οι 4 εώς 8 στήλες του csv αρχείου φαίνονται παρακάτω
knitr::kable(myData[1:4,4:8])
| Area Command | Nature of Contact | Reason Desc | Master Subject ID | Subject Sex |
|---|---|---|---|---|
| FR | VIEWED OFFENSE | NECESSARY TO EFFECT ARREST / DETENTION | 167510327: 2015541517 | M |
| GE | VIEWED OFFENSE | NECESSARY TO EFFECT ARREST / DETENTION | 459191174: 20151510003 | M |
| GE | VIEWED OFFENSE | IN CUSTODY, MAINTAINING CONTROL | 459191174: 20151510003 | M |
| HE | TRAFFIC STOP | NECESSARY TO DEFEND REPORTING OFFICER | 198377769: 2014111929 | M |
# count crimes
crimes <- myData %>%
group_by(`X-Coordinate`,
`Y-Coordinate`) %>%
count() %>%
arrange(desc(n)) %>%
drop_na()
# rename long column names
names(crimes) <- c('x','y','n')
# consider only instances of crimes more than once
crimes_gt_1 <- crimes[crimes$n > 1,]
shpMap +
geom_point(aes(x = x,
y = y,
size = n,
alpha = 0.8,
color = "#fff000"),
data = crimes_gt_1)+
labs(title = "Crime Occurences more than Once")+
theme(legend.position = "none")
Ενας πιο όμορφος χάρτης του Austin με τα “VIEWED OFFENSE”,“TRAFFIC STOP”,“TACTICAL OPERATION”,“WARRANT SERVICE” καταγεγραμμένα πάνω σε αυτόν
# the trafic stop violations
ts_vo <- myData %>%
filter(`Nature of Contact` %in% c("VIEWED OFFENSE",
"TRAFFIC STOP",
"TACTICAL OPERATION",
"WARRANT SERVICE"))
City_of_Austin <- as.factor(ts_vo$`Nature of Contact`)
# use gmplot to draw a better map
qmplot(Longitude,
Latitude,
data = ts_vo,
maptype = "watercolor",
color = City_of_Austin)